@InProceedings{CamilloShin:2017:CaStUs,
author = "Camillo, Mario and Shin-Ting, Wu",
affiliation = "{University of Campinas} and {University of Campinas}",
title = "Accessing CUDA features in the OpenGL rendering pipeline: A case
study using N-Body simulation",
booktitle = "Proceedings...",
year = "2017",
editor = "Torchelsen, Rafael Piccin and Nascimento, Erickson Rangel do and
Panozzo, Daniele and Liu, Zicheng and Farias, Myl{\`e}ne and
Viera, Thales and Sacht, Leonardo and Ferreira, Nivan and Comba,
Jo{\~a}o Luiz Dihl and Hirata, Nina and Schiavon Porto, Marcelo
and Vital, Creto and Pagot, Christian Azambuja and Petronetto,
Fabiano and Clua, Esteban and Cardeal, Fl{\'a}vio",
organization = "Conference on Graphics, Patterns and Images, 30. (SIBGRAPI)",
publisher = "IEEE Computer Society",
address = "Los Alamitos",
keywords = "GLSL, CUDA, n-body simulation, OpenGL.",
abstract = "The advances of the graphics programing unit (GPU) architecture
and its rapidly evolving towards general purpose GPU make a series
of applications adopt a general purpose (GPGPU) and a graphics
computing interoperability approach in which the first is used for
heavy calculations and the second for 3D graphics rendering.
Because GPGPU exposes several hardware features, such as shared
memory and thread synchronization mechanism, it allows a developer
to write more efficient code. Nevertheless, we conjecture that
such hardware features are also available in the graphics
computing interface OpenGL 4.5 or later through the graphics
concepts: blending, transform feedback, tessellation and
instancing. In this paper we assess our conjecture by implementing
an N-body simulation with both approaches. We indeed devise a
novel non-graphics application to the tessellation hardware and
the instanced rendering circuit. Instead of refining a mesh, we
use the abstract patch for gaining direct accesses to shared
memory. In the place of drawing multiple objects, we apply the
instanced rendering technology for improving sequential data
accesses. Comparative timing analysis is provided. We believe that
these results provide better understanding of the graphics
features that are useful for closing the performance gap between
OpenGL and a GPGPU architecture, and open a new perspective on
implementing solely with the OpenGL graphics applications that
require both intense, but pre-specified, memory accesses and 3D
graphics rendering.",
conference-location = "Niter{\'o}i, RJ, Brazil",
conference-year = "17-20 Oct. 2017",
doi = "10.1109/SIBGRAPI.2017.48",
url = "http://dx.doi.org/10.1109/SIBGRAPI.2017.48",
language = "en",
ibi = "8JMKD3MGPAW/3PFRE42",
url = "http://urlib.net/ibi/8JMKD3MGPAW/3PFRE42",
targetfile = "30-camera-ready.pdf",
urlaccessdate = "2024, May 01"
}